import numpy as np
import pandas as pd
from itertools import cycle
from scipy import interp
import more_itertools as mit
# sklearn
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Tensorflow
import tensorflow as tf
tf.random.set_seed(123)
# Timer
from timeit import default_timer as timer
# Visualisation libraries
## IPython
from IPython.display import clear_output
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex
## seaborn
import seaborn as sns
sns.set_context('paper', rc={'font.size':12,'axes.titlesize':14,'axes.labelsize':12})
sns.set_style('whitegrid')
## matplotlib
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors
from pylab import rcParams
from matplotlib.font_manager import FontProperties
plt.rcParams['figure.figsize'] = 14, 8
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
# %config InlineBackend.figure_format = 'retina'
import warnings
warnings.filterwarnings("ignore")
In this article, we demonstrate solving a classification problem in TensorFlow using Estimators using the Heart Disease Dataset from the UCI Machine Learning Repository.

Picture Source: harvard.edu
The object of the exercise is to develop a predictive model that can predict whether heart disease is present or absent based on the rest of the given features.
Data = np.genfromtxt('heart-disease/heart.dat', delimiter=' ')
Attributes = ['Age', 'Sex', 'Chest Pain Type', 'Resting Blood Pressure', 'Serum Cholestoral',
'Fasting Blood Sugar', 'Resting Electrocardiographic Results', 'Maximum Heart Rate Achieved',
'Exercise Induced Angina', 'Oldpeak', 'Slope',
'Number of Major Vessels', 'Thal', 'Heart Disease']
Data = pd.DataFrame(data = Data, columns = Attributes)
#
Temp = ['Sex', 'Chest Pain Type', 'Fasting Blood Sugar', 'Resting Electrocardiographic Results',
'Exercise Induced Angina', 'Slope', 'Number of Major Vessels','Thal']
for c in Temp:
Data[c] = Data[c].astype(int).astype(str)
del Temp, c
Target = 'Heart Disease'
Labels = ['Absent', 'Present']
Data['Heart Disease'] = (Data['Heart Disease']-1).astype(int)
#
display(Data.head(5))
display(pd.DataFrame({'Number of Instances': [Data.shape[0]], 'Number of Attributes': [Data.shape[1]]}).style.hide_index())
def Data_info(Inp, Only_NaN = False):
Out = Inp.dtypes.to_frame(name='Data Type').sort_values(by=['Data Type'])
Out = Out.join(Inp.isnull().sum().to_frame(name = 'Number of NaN Values'), how='outer')
Out ['Size'] = Inp.shape[0]
Out['Percentage'] = 100 - np.round(100*(Out['Number of NaN Values']/Inp.shape[0]),2)
Out.index.name = 'Features'
Out['Data Type'] = Out['Data Type'].astype(str)
if Only_NaN:
Out = Out.loc[Out['Number of NaN Values']>0]
return Out
# Maps
Maps = {'Sex': {'0':'Female', '1':'Male'},
'Chest Pain Type': {'1':'Typical Angina', '2':'Atypical Angina', '3': 'Non-Anginal Pain', '4':'Asymptomatic'},
'Fasting Blood Sugar': {'0': 'False', '1': 'True'}, 'Exercise Induced Angina': {'0': 'No', '1': 'Yes'},
'Slope': {'1': 'Upsloping', '2': 'Flat', '3': 'Downsloping'},
'Thal': {'3': 'Normal', '6': 'Fixed Defect','7': 'Reversable Defect'}}
| Age | Sex | Chest Pain Type | Resting Blood Pressure | Serum Cholestoral | Fasting Blood Sugar | Resting Electrocardiographic Results | Maximum Heart Rate Achieved | Exercise Induced Angina | Oldpeak | Slope | Number of Major Vessels | Thal | Heart Disease | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 70.0 | 1 | 4 | 130.0 | 322.0 | 0 | 2 | 109.0 | 0 | 2.4 | 2 | 3 | 3 | 1 |
| 1 | 67.0 | 0 | 3 | 115.0 | 564.0 | 0 | 2 | 160.0 | 0 | 1.6 | 2 | 0 | 7 | 0 |
| 2 | 57.0 | 1 | 2 | 124.0 | 261.0 | 0 | 0 | 141.0 | 0 | 0.3 | 1 | 0 | 7 | 1 |
| 3 | 64.0 | 1 | 4 | 128.0 | 263.0 | 0 | 0 | 105.0 | 1 | 0.2 | 2 | 1 | 7 | 0 |
| 4 | 74.0 | 0 | 2 | 120.0 | 269.0 | 0 | 2 | 121.0 | 1 | 0.2 | 1 | 1 | 3 | 0 |
| Number of Instances | Number of Attributes |
|---|---|
| 270 | 14 |
data_info = Data_info(Data).reset_index(drop = False)
fig = px.bar(data_info, x= 'Features', y= 'Percentage', color = 'Data Type', text = 'Data Type',
color_discrete_sequence = ['PaleGreen', 'LightBlue', 'PeachPuff'], hover_data = data_info.columns)
fig.update_layout(plot_bgcolor= 'white', legend=dict(x=1, y=.5, traceorder="normal", bordercolor="DarkGray", borderwidth=1))
fig.update_traces(texttemplate= 6*' ' + '%{label}', textposition='inside')
fig.update_traces(marker_line_color= 'Black', marker_line_width=1., opacity=1)
fig.show()
# A copy of the Dataframe
df = Data.copy()
df.columns = [x.replace(' ','_') for x in df.columns]
Temp = Target.replace(' ','_')
X = df.drop(columns = Temp)
y = df[Temp].values
del df, Temp
Test_Size = 0.3
def Sets_Plot(Data, Test_Size):
Temp = pd.DataFrame({'Set': ['Train', 'Test'],
'Number of Instances':[int(Data.shape[0]*(1-Test_Size)), int(Data.shape[0]*Test_Size)]})
Temp['Percentage'] = np.round(100* Temp['Number of Instances'].values /Temp['Number of Instances'].sum(), 2)
fig = px.bar(Temp, y= ['',''], x= 'Number of Instances', orientation='h', color = 'Set', text = 'Percentage',
color_discrete_sequence = ['PaleGreen', 'LightBlue'], height = 180)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h', legend=dict(x=0, y=1.7),
xaxis = dict(tickmode = 'array', tickvals = [0, Data.shape[0]], ticktext = ['','']))
fig.update_traces(marker_line_color= 'Black', marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}% ', textposition='inside')
fig.update_xaxes(title_text=None, range=[0, Data.shape[0]])
fig.update_yaxes(title_text=None)
fig.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= Test_Size, random_state=42)
display(pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).set_index('Set').T)
Sets_Plot(Data, Test_Size)
| Set | X_train | X_test | y_train | y_test |
|---|---|---|---|---|
| Shape | (189, 13) | (81, 13) | (189,) | (81,) |
Create the feature columns, using the original numeric columns as is and one-hot-encoding categorical variables.
def Feat_Columns(Inp, Numeric = False, disp_dtype = False):
'''
Feature Columns function
Input: Dataset
Output: Tensorflow Feature Column List
'''
if not Numeric:
Numeric = ['int64', 'int32', 'float64', 'float32']
Temp = Inp.dtypes.reset_index(drop = False)
Temp.columns = ['Features', 'Data Type']
Temp['Data Type'] = Temp['Data Type'].astype(str)
# Numeric_Columns
Numeric_Columns = Temp.loc[Temp['Data Type'].isin(Numeric), 'Features'].tolist()
# Categorical_Columns
# Categorical_Columns = Temp.loc[(~Temp['Data Type'].isin(Numeric)), 'Features'].tolist()
Categorical_Columns = Temp.loc[Temp['Data Type'] == 'object','Features'].tolist()
if disp_dtype:
display(pd.DataFrame({'Numeric Columns': [', '.join(Numeric_Columns)],
'Categorical Columns': [', '.join(Categorical_Columns)]}, index = ['Columns']).T.style)
# Feature Columns
feature_columns = []
if len(Categorical_Columns)>0:
for feature_name in Categorical_Columns:
vocabulary = Inp[feature_name].unique()
feature_columns.append(tf.feature_column.indicator_column(\
tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary)))
if len(Numeric_Columns)>0:
for feature_name in Numeric_Columns:
feature_columns.append(tf.feature_column.numeric_column(feature_name))
return feature_columns
The input function specifies how data is converted to a tf.data.Dataset that feeds the input pipeline in a streaming fashion. Moreover, an input function is a function that returns a tf.data.Dataset object which outputs the following two-element tuple:
def make_input_fn(X, y, inmemory_train = False, n_epochs= None, shuffle=True, batch_size = 256):
# Not In memory Training
if not inmemory_train:
def input_fn():
dataset = tf.data.Dataset.from_tensor_slices((X.to_dict(orient='list'), y))
if shuffle:
dataset = dataset.shuffle(1000)
dataset = (dataset.repeat(n_epochs).batch(batch_size))
return dataset
# In memory Training
if inmemory_train:
y = np.expand_dims(y, axis=1)
def input_fn():
return dict(X), y
# End
return input_fn
my_feature_columns = Feat_Columns(X)
# Training and evaluation input functions.
train_input_fn = make_input_fn(X_train, y_train)
eval_input_fn = make_input_fn(X_test, y_test, shuffle=False, n_epochs=1)
# Classifier
tf.keras.backend.clear_session()
IT = int(1e3)
params = {'n_trees': 50, 'max_depth': 3, 'n_batches_per_layer': 1, 'center_bias': True}
classifier = tf.estimator.BoostedTreesClassifier(my_feature_columns, **params)
# Train model.
start = timer()
classifier.train(train_input_fn, max_steps = IT)
CPU_Time = timer() - start
# Evaluation.
results = classifier.evaluate(eval_input_fn)
clear_output()
results['CPU Time'] = CPU_Time
display(pd.DataFrame(results, index = ['']).round(4))
| accuracy | accuracy_baseline | auc | auc_precision_recall | average_loss | label/mean | loss | precision | prediction/mean | recall | global_step | CPU Time | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.7778 | 0.6049 | 0.8865 | 0.8565 | 0.4267 | 0.3951 | 0.4267 | 0.7917 | 0.3231 | 0.5938 | 172 | 5.7731 |
An alternative way to train a model with boosting performance is using the train_in_memory feature. However, if there is no issue with performance or long training time is not a concern, training without this feature is recommended [5]. Furthermore, our observations have shown that using train_in_memory not always increases the performance of the training.
in_memory_params = dict(params)
in_memory_params['n_batches_per_layer'] = 1
# In-memory input_fn does not use batching.
train_input_fn = make_input_fn(X_train, y_train, inmemory_train = True)
# Classifier
tf.keras.backend.clear_session()
classifier = tf.estimator.BoostedTreesClassifier(my_feature_columns, train_in_memory=True, **in_memory_params)
# Train model.
start = timer()
classifier.train(train_input_fn, max_steps = IT)
CPU_Time = timer() - start
# Evaluation.
results = classifier.evaluate(eval_input_fn)
clear_output()
results['CPU Time'] = CPU_Time
display(pd.DataFrame(results, index = ['']).round(4))
| accuracy | accuracy_baseline | auc | auc_precision_recall | average_loss | label/mean | loss | precision | prediction/mean | recall | global_step | CPU Time | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.7778 | 0.6049 | 0.8791 | 0.8402 | 0.4493 | 0.3951 | 0.4493 | 0.7692 | 0.3399 | 0.625 | 152 | 7.6655 |
We can investigate the feature importance of an artificial classification task. This is similar to that of scikit-learn and has been outlined in [6].
pred_dicts = list(classifier.experimental_predict_with_explanations(eval_input_fn))
clear_output()
# Create DFC Pandas dataframe.
labels = y_test
probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])
df_dfc = pd.DataFrame([pred['dfc'] for pred in pred_dicts])
display(df_dfc.describe().T.style.background_gradient(subset= ['mean'], cmap='RdYlGn')\
.background_gradient(subset= ['std'], cmap='RdYlGn')\
.background_gradient(subset= ['min'], cmap='hot')\
.background_gradient(subset= ['max'], cmap='winter').set_precision(4))
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Number_of_Major_Vessels | 81.0000 | 0.0084 | 0.0890 | -0.1439 | -0.0530 | -0.0272 | 0.0559 | 0.2279 |
| Oldpeak | 81.0000 | -0.0356 | 0.1130 | -0.1624 | -0.0978 | -0.0811 | -0.0072 | 0.4197 |
| Chest_Pain_Type | 81.0000 | -0.0093 | 0.0831 | -0.1366 | -0.0779 | -0.0483 | 0.0765 | 0.1480 |
| Serum_Cholestoral | 81.0000 | -0.0176 | 0.0562 | -0.1421 | -0.0436 | -0.0244 | 0.0029 | 0.1748 |
| Slope | 81.0000 | -0.0000 | 0.0407 | -0.1121 | -0.0319 | -0.0175 | 0.0304 | 0.0811 |
| Thal | 81.0000 | -0.0300 | 0.0935 | -0.2528 | -0.0963 | -0.0635 | 0.0551 | 0.2391 |
| Resting_Blood_Pressure | 81.0000 | 0.0120 | 0.0761 | -0.2181 | -0.0251 | -0.0062 | 0.0295 | 0.3317 |
| Maximum_Heart_Rate_Achieved | 81.0000 | -0.0103 | 0.0516 | -0.2139 | -0.0276 | -0.0101 | 0.0130 | 0.1150 |
| Resting_Electrocardiographic_Results | 81.0000 | 0.0054 | 0.0167 | -0.0276 | -0.0027 | 0.0000 | 0.0095 | 0.0639 |
| Sex | 81.0000 | -0.0050 | 0.0480 | -0.1398 | -0.0444 | 0.0067 | 0.0220 | 0.1114 |
| Exercise_Induced_Angina | 81.0000 | -0.0198 | 0.0423 | -0.1072 | -0.0467 | -0.0333 | -0.0117 | 0.1118 |
| Age | 81.0000 | -0.0240 | 0.0543 | -0.1478 | -0.0584 | -0.0343 | 0.0016 | 0.1604 |
| Fasting_Blood_Sugar | 81.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 |
A nice property of DFCs is that the sum of the contributions + the bias is equal to the prediction for a given example.
# Sum of DFCs + bias == probabality.
bias = pred_dicts[0]['bias']
dfc_prob = df_dfc.sum(axis=1) + bias
np.testing.assert_almost_equal(dfc_prob.values, probs.values)
Plot DFCs for an individual patient which is color-coded based on the contributions' directionality and add the feature values on the figure.
def _add_feature_values(feature_values, ax, colors):
"""Display feature's values on left of plot."""
x_coord = ax.get_xlim()[0]
OFFSET = 0.15
for y_coord, (feat_name, feat_val) in enumerate(feature_values.items()):
t = plt.text(x_coord, y_coord - OFFSET, '{}'.format(feat_val), size=12)
t.set_bbox(dict(facecolor= colors[y_coord], alpha=0.25))
font = FontProperties()
# font.set_weight('bold')
t = plt.text(x_coord, y_coord + 1 - OFFSET, 'Feature\nValue', fontproperties=font, size=13)
def _yaxis_labels(ax):
y_labels = []
for c in [c.get_text() for c in ax.get_yticklabels()]:
List = list(mit.locate(c, lambda x: x == " "))
if len(List)>1:
List = List[1::2]
Temp1 = list(c)
for position in List:
Temp1[position] = '\n'
c = "".join(Temp1)
y_labels.append(c)
return y_labels
def _xLims(ax):
Temp = np.linspace(-1,1,21, endpoint=True)
Temp = np.round(Temp,1)
xlims = ax.get_xlim()
for l, r in list(zip(Temp[:-1],Temp[1:])):
if l<= xlims[0] < r:
Left = l
if l<= xlims[1] < r:
Right = r
return [Left, Right]
def Plot_Example(example, TOP_N = 10, Pos_Color = 'LimeGreen', Neg_Color = 'OrangeRed', Maps = Maps, FS = (13, 7)):
example.index = [x.replace('_',' ') for x in example.index]
# Sorting by absolute value
sorted_ix = example.abs().sort_values()[-TOP_N:].index
example = example[sorted_ix]
fig, ax = plt.subplots(1, 1, figsize= FS)
Temp = example.to_frame('Value').sort_index(ascending= False)
_ = Temp.plot(kind='barh', color='None', edgecolor = 'Black', legend=None, alpha=1, lw=1.2, ax = ax)
_ = Temp.loc[Temp['Value']>=0].plot(kind='barh', color= Pos_Color, edgecolor = 'white', hatch = '///',
legend=None, alpha=0.75, ax = ax)
_ = Temp.loc[Temp['Value']<0].plot(kind='barh', color= Neg_Color, edgecolor = 'white', hatch = '///',
legend=None, alpha=0.75, ax = ax)
_ = Temp.plot(kind='barh', color='None', edgecolor = 'Black', legend=None, alpha=1, lw=1.2, ax = ax)
del Temp
_ = ax.grid(False, axis='y')
# Y axis Labels
_ = ax.set_yticklabels(_yaxis_labels(ax), size=12)
# x axis Limits
_ = ax.set_xlim(_xLims(ax))
# Add feature values.
Temp = X_test.copy()
Temp.columns = [x.replace('_',' ') for x in Temp.columns]
for c in Maps.keys():
Temp[c] = Temp[c].map(Maps[c])
colors = example.map(lambda x: Pos_Color if x >= 0 else Neg_Color).tolist()
_add_feature_values(Temp.iloc[ID][sorted_ix], ax, colors)
return ax
# Plot results.
# ID = np.random.choice(len(y_test), 1)[0]
ID = 61
Tops = X_train.shape[1]
ax = Plot_Example(df_dfc.iloc[ID], TOP_N = Tops, FS = (13, 9))
_ = ax.set_title('Feature contributions for example patient {} from the Test set\n Pred: {:1.2f}; Label: {}'
.format(ID, probs[ID], labels[ID]))
_ = ax.set_xlabel('Contribution to Predicted Probability', size=14)
importances = classifier.experimental_feature_importances(normalize=True)
df_imp = pd.Series(importances)
Temp = df_imp.iloc[0:Tops][::-1].reset_index()
Temp.columns = ['Features','Importance']
Temp['Features'] = Temp['Features'].map(lambda x: x.replace('_',' '))
importances = classifier.experimental_feature_importances(normalize=True)
def Plot_FeatImportance(pds, TOP_N = 10, FS = (13, 6)):
Temp = pds.iloc[0:TOP_N][::-1].reset_index()
Temp.columns = ['Features','Importance']
Temp = Temp.sort_values(by=['Importance'])
Temp['Features'] = Temp['Features'].map(lambda x: x.replace('_',' '))
fig, ax = plt.subplots(1, 1, figsize = FS)
CP = sns.color_palette("RdYlGn", TOP_N)
_ = sns.barplot(ax = ax, x='Importance', y= 'Features', data= Temp, palette=CP, hatch = '//')
_ = sns.barplot(ax = ax, x='Importance', y= 'Features', data= Temp, facecolor = 'None', edgecolor = 'Indigo')
_ = ax.grid(False, axis='y')
# Y axis Labels
_ = ax.set_yticklabels(_yaxis_labels(ax), size=12)
# x axis Limits
_ = ax.set_xlim(_xLims(ax))
return ax
ax = Plot_FeatImportance(pd.Series(importances), TOP_N = Tops, FS = (13, 7.5))
_ = ax.set_xlim(right = .16)
ax = Plot_FeatImportance(df_dfc.abs().mean(), TOP_N = Tops, FS = (13, 7.5))
_ = ax.set_xlim(right = .1)
def permutation_importances(est, X_eval, y_test, metric, features):
"""Column by column, shuffle values and observe effect on eval set.
source: http://explained.ai/rf-importance/index.html
A similar approach can be done during training. See "Drop-column importance"
in the above article."""
baseline = metric(est, X_eval, y_test)
imp = []
for col in features:
save = X_eval[col].copy()
X_eval[col] = np.random.permutation(X_eval[col])
m = metric(est, X_eval, y_test)
X_eval[col] = save
imp.append(baseline - m)
return np.array(imp)
def accuracy_metric(est, X, y):
"""TensorFlow estimator accuracy."""
eval_input_fn = make_input_fn(X,
y=y,
shuffle=False,
n_epochs=1)
return est.evaluate(input_fn=eval_input_fn)['accuracy']
features = X_train.columns.tolist()
importances = permutation_importances(classifier, X_test, y_test, accuracy_metric,
features)
df_imp = pd.Series(importances, index=features)
clear_output()
_ = Plot_FeatImportance(df_imp)
Detrano, R., Janosi, A., Steinbrunn, W., Pfisterer, M., Schmid, J.J., Sandhu, S., Guppy, K.H., Lee, S. and Froelicher, V., 1989. International application of a new probability algorithm for the diagnosis of coronary artery disease. The American journal of cardiology, 64(5), pp.304-310.
Aha, D. and Kibler, D., 1988. Instance-based prediction of heart-disease presence with the Cleveland database. University of California, 3(1), pp.3-2.
Gennari, J.H., Langley, P. and Fisher, D., 1989. Models of incremental concept formation. Artificial intelligence, 40(1-3), pp.11-61.